package com.dark.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.HashMap;
import java.util.Map;

/**
 * Created by tengxue on 16-4-19.
 * 学习jsoup的第一个java类.使用url方式获取内容然后进行解析
 */
public class JsoupTest {

    public static void main(String[] arg) throws Exception{
        Map<String,String> cookies=new HashMap<String, String>();
        String cookiesStr="BAIDU_SSP_lcr=https://www.baidu.com/; bdshare_firstime=1447321806354; vjuids=341655a81.150fb1a27b9.0.1dd4161d; vjlast=1447321807.1454744466.23; ADVS=33e80292a285b1; ASL=16910,00rzk,b6963b18b6963b18b6963b18; __utmt=1; __utma=194262068.2097614054.1447321806.1454744466.1461032446.3; __utmb=194262068.2.10.1461032446; __utmc=194262068; __utmz=194262068.1461032446.3.3.utmcsr=baidu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; HexunTrack=SID=2015111217500707437a0f820b33c4a0680f5c159c41efd83&CITY=51&TOWN=510100; ADVC=336b50b280b5a9; Hm_lvt_564ecf0af5b437b46cf31d75287f1f4b=1461032446; Hm_lpvt_564ecf0af5b437b46cf31d75287f1f4b=1461032996";
        String[] temp=cookiesStr.split(";");
        String[] records;
        for (String item:temp){
            item=item.trim();
            records=item.split("=");
            cookies.put(records[0],records[1]);
        }
        //183401094
        //183401210
        //183332013
        Document doc = Jsoup.connect("http://tech.hexun.com/2016-04-15/183332013.html")
//                .cookies(cookies)
                .userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36")
                .get();
        String title = doc.title();
        Element content=doc.getElementById("artibody");
        Elements contents=content.getElementsByTag("p");
        for (Element entity : contents) {
            String contentText = entity.text();

            System.out.println("contentText:"+contentText);

        }
        System.out.println("title is:"+title);
        Element someLink = doc.getElementById("headerNav_2014");
        Elements links = someLink.getElementsByTag("a");
        for (Element link : links) {
            String linkHref = link.attr("href");
            String linkText = link.text();

            System.out.println("linkHref:"+linkHref+"------>"+"linkText:"+linkText);

        }
    }
}
